libname home "/scratch/baruch/";

option compress=yes;

%let timer = %sysfunc(datetime());
options nonotes;  

%macro NASDAQ_before2015(year1, year2);

proc datasets nolist lib=work; delete _out; quit;

%do date = &year1 %to &year2;
%if %sysfunc(exist(taq.ct_&date.)) %then %do;

proc datasets nolist lib=work; 
 delete _f1 _f11 _f11_unique _f12 _f1_unique 
        _f2 _f3 _s1 _s2 _s3 _t1 _t11 _t11_unique _t12 _t1_unique _t2 _t3 _oneday _onedayy; 
run; quit;

********************************************************************************************;
********************************************************************************************;
***** 00:00 to 9:45 ************************************************************************;

data _f1; 
 set taq.ct_&date.; 
 keep DATE TIME EX SYMBOL SIZE COND;
 where Corr=0 and size>0 and price>1 and
       /* trades during 00:00 to 9:45, submitted from NYSE */
       time >= hms(0,00,0) and time < hms(9,45,0) and ex in ("T" "Q");
run;

/* In TAQ, COND = "O" or "Q" are indicators for opening auction,
           COND = "M" or "6" are indicators for closing auction */
/* Sometimes, TAQ reports duplicates trades for closing and opening, 
           that is, using "O" to report once, and then immediately uses "Q" to report the same trade again */
/* So we need to eliminate these potential duplicates */

/* opening auction trades, eliminate potential duplicates */
data _f11; set _f1; where COND contains 'O' or COND contains 'Q'; run;
proc sort data= _f11 out= _f11_unique nodupkey; by date symbol ex size; run;

/* regular trades */
data _f12; set _f1; where COND not contains 'O' and COND not contains 'Q'; run;

/* combine non-duplicate opening auction trades with non-auction trades */
data _f1_unique; set _f11_unique _f12; run;

/* sum the trades during 00:00 to 9:45 */
proc sql;
 create table _f2 as 
 select distinct date, symbol, ex, sum(size) as volume
 from _f1_unique
 group by date, symbol, ex
 order by date, symbol, ex 
;
quit;

/* rearrange results */
data _f3; retain DATE time symbol ex volume; set _f2; time = hms(9,45,0); format time TIME20.9; run;


********************************************************************************************;
********************************************************************************************;
***** 9:45 to 15:45 ************************************************************************;

data _s1; 
 set taq.ct_&date.; 
 keep DATE TIME EX SYMBOL SIZE COND;
 where Corr=0 and size>0 and price>1 and
       time >= hms(9,45,0) and time < hms(15,45,0) and ex in ("T" "Q");
run;

%let t = %sysfunc(hms(9,45,0)); %let interval = 15*60; 
data _s2; set _s1; format time TIME20.9; 
 %do %until (&t. >= %sysfunc(hms(15,45,0)));
  %let start = %sysevalf(&t.); %let end = %sysevalf(&t. + &interval.);   
  if time >= &start. and time < &end. then do; time_m = &end.; end;
  %let t = %sysevalf(&t. + &interval.);
 %end;
run;

proc sql;
 create table _s3 as
 select distinct date, time_m as time, symbol, ex, sum(size) as volume
 from _s2
 group by date, symbol, ex, time_m
 order by date, symbol, ex, time_m
;
quit;

********************************************************************************************;
********************************************************************************************;
***** 15:45 to 16:01 ***********************************************************************;

data _t1; 
 set taq.ct_&date.; 
 keep DATE TIME EX SYMBOL SIZE COND;
 where Corr=0 and size>0 and price>1 and
       /* trades during 15:45 to 16:01, submitted from NYSE */
       time >= hms(15,45,0) and time <= hms(16,01,0) and ex in ("T" "Q");
run;

/* In TAQ, COND = "O" or "Q" are indicators for opening auction,
           COND = "M" or "6" are indicators for closing auction */
/* Sometimes, TAQ reports duplicates trades for closing and opening, 
           that is, using "M" to report once, and then immediately uses "6" to report the same trade again */
/* So we need to eliminate these potential duplicates */

/* closing auction trades */
data _t11; set _t1; where COND contains 'M' or COND contains '6'; run;
proc sort data= _t11 out= _t11_unique nodupkey; by date symbol ex size; run;

/* regular trades */
data _t12; set _t1; where COND not contains 'M' and COND not contains '6'; run;

/* combine non-duplicate closing auction trades with non-auction trades */
data _t1_unique; set _t12 _t11_unique; run;

proc sql;
 create table _t2 as 
 select distinct date, symbol, ex, sum(size) as volume
 from _t1_unique
 group by date, symbol, ex 
 order by date, symbol, ex
;
quit;

data _t3; retain DATE time symbol EX volume; set _t2; time = hms(16,01,0); format time TIME20.9; run;


********************************************************************************************;
********************************************************************************************;
***** 00:00 to 16:01: one day **************************************************************;

data _oneday; set _f3 _s3 _t3; run;

%let ym = %substr(&date., 1, 6);

proc sql;
	create table _onedayy as
	select distinct a.*, b.cusip
	from _oneday as a left join taq.mast_&ym. as b
	on a.symbol=b.symbol 
	where a.date >= b.datef
	order by date, symbol, ex, time;
quit;


********************************************************************************************;
********************************************************************************************;
***** 00:00 to 16:01: append all days*******************************************************;
proc datasets nolist lib=work; append base=_out data=_onedayy force; run; quit;
%end; %end;

%mend;

%NASDAQ_before2015(year1=20000101, year2=20141231);

/* save data to home dictionary */
data home.NASDAQ00to14; set _out; run; 

/* options notes; */
data _null_;
dur = datetime() - &timer;
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run; 
